library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
# Load the processing functions. This also loads the normalization functions.
source("safegraph_process_patterns_functions.R")
# SET your path here to the covid19analysis folder.
sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'
# SET your path here to the github covid folder
github_path <- ''
Comparing different metrics that assess social distancing. First load the data.
scc_blockgroups <-
block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84')
sf_blockgroups <-
block_groups("CA","San Francisco", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84')
# zip code areas
zctas_94 <-
zctas(cb = F, starts_with = "94") %>%
st_transform('+proj=longlat +datum=WGS84') %>%
dplyr::select(ZCTA5CE10, geometry)
zctas_95 <-
zctas(cb = F, starts_with = "95") %>%
st_transform('+proj=longlat +datum=WGS84') %>%
dplyr::select(ZCTA5CE10, geometry)
zctas_94_95 <- rbind(zctas_94, zctas_95)
# join with block groups
scc_bg_zctas <- scc_blockgroups %>%
st_centroid() %>%
st_join(zctas_94_95) %>%
dplyr::select(GEOID, ZCTA5CE10) %>%
rename(blockgroup = GEOID, zipcode = ZCTA5CE10)
sf_bg_zctas <- sf_blockgroups %>%
st_centroid() %>%
st_join(zctas_94) %>%
dplyr::select(GEOID, ZCTA5CE10) %>%
rename(blockgroup = GEOID, zipcode = ZCTA5CE10)
# Safegraph social distancing data
bay_sd <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds") %>%
mutate(date = date_range_start %>% substr(1,10) %>% as.Date())
# patterns data
sfc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-09_origins_daily.rds"))
sfc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-16_origins_daily.rds"))
marin_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/mac_patterns_2020-03-09_origins_daily.rds"))
smc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/smc_patterns_2020-03-09_origins_daily.rds"))
scc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/scc_patterns_2020-03-09_origins_daily.rds"))
ccc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/ccc_patterns_2020-03-09_origins_daily.rds"))
alc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/alc_patterns_2020-03-09_origins_daily.rds"))
marin_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/mac_patterns_2020-03-16_origins_daily.rds"))
smc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/smc_patterns_2020-03-16_origins_daily.rds"))
scc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/scc_patterns_2020-03-16_origins_daily.rds"))
ccc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/ccc_patterns_2020-03-16_origins_daily.rds"))
alc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/alc_patterns_2020-03-16_origins_daily.rds"))
# census data processing
acs_vars = readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/censusData2018_acs_acs5.rds")
sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)
# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
regionString <- paste0("state:06+county:", county)
censusData <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = regionString,
vars = variableToPull
) %>%
mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
return(censusData)
}
For now, just going to look at SF block groups. Also filter social distancing data to be in the date ranges of the patterns data.
sfc_patterns_origins_daily_week_pre_in_sf <- sfc_patterns_origins_daily_week_pre %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
sfc_patterns_origins_daily_week1_in_sf <- sfc_patterns_origins_daily_week1 %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
marin_patterns_origins_daily_week1_in_sf <- marin_patterns_origins_daily_week1 %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
marin_patterns_origins_daily_week_pre_in_sf <- marin_patterns_origins_daily_week_pre %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
smc_patterns_origins_daily_week1_in_sf <- smc_patterns_origins_daily_week1 %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
smc_patterns_origins_daily_week_pre_in_sf <- smc_patterns_origins_daily_week_pre %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
scc_patterns_origins_daily_week1_in_sf <- scc_patterns_origins_daily_week1 %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
scc_patterns_origins_daily_week_pre_in_sf <- scc_patterns_origins_daily_week_pre %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
ccc_patterns_origins_daily_week1_in_sf <- ccc_patterns_origins_daily_week1 %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
ccc_patterns_origins_daily_week_pre_in_sf <- ccc_patterns_origins_daily_week_pre %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
alc_patterns_origins_daily_week1_in_sf <- alc_patterns_origins_daily_week1 %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
alc_patterns_origins_daily_week_pre_in_sf <- alc_patterns_origins_daily_week_pre %>%
filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)
combined_sf_week_pre_1_by_date <- rbind(sfc_patterns_origins_daily_week_pre_in_sf, sfc_patterns_origins_daily_week1_in_sf, marin_patterns_origins_daily_week1_in_sf, marin_patterns_origins_daily_week_pre_in_sf, smc_patterns_origins_daily_week1_in_sf, smc_patterns_origins_daily_week_pre_in_sf, scc_patterns_origins_daily_week1_in_sf, scc_patterns_origins_daily_week_pre_in_sf, ccc_patterns_origins_daily_week1_in_sf, ccc_patterns_origins_daily_week_pre_in_sf, alc_patterns_origins_daily_week1_in_sf, alc_patterns_origins_daily_week_pre_in_sf) %>%
group_by(origin_census_block_group, date) %>%
summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
filter(!is.na(visit_counts_high_zip)) %>%
mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)
sf_sd_pre_1 <- bay_sd %>%
filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>%
filter(date >= min(combined_sf_week_pre_1_by_date$date) & date <= max(combined_sf_week_pre_1_by_date$date)) %>%
left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>%
filter(!is.na(zipcode)) %>%
group_by(origin_census_block_group, date) %>%
summarize(total_at_home = sum(completely_home_device_count), total_devices = sum(device_count), dwell_time = sum(median_non_home_dwell_time), distance_traveled = sum(distance_traveled_from_home)) %>%
mutate(
percent_at_home = total_at_home*100/total_devices,
percent_leaving_home = (100 - percent_at_home)
)
# join these
sf_sd_visits_pre_1 <- left_join(combined_sf_week_pre_1_by_date, sf_sd_pre_1)
Get population data.
sf_pop_block <- pullCensus("B01003_001E", sf_fips) %>%
left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
rename(total_pop = B01003_001E)
sf_sd_visits_pre_1 <- sf_sd_visits_pre_1 %>% left_join(sf_pop_block, by = c("origin_census_block_group" = "blockgroup")) %>%
mutate(visits_per_capita = visit_counts_avg/ total_pop, dwell_per_capita = dwell_time/total_pop, distance_per_capita = distance_traveled/total_pop)
sf_sd_visits_pre_1_zip <- sf_sd_visits_pre_1 %>%
group_by(zipcode, date) %>%
summarize(total_at_home = sum(total_at_home), total_devices = sum(total_devices), dwell_time = sum(dwell_time), distance_traveled = sum(distance_traveled), visit_counts_avg = sum(visit_counts_avg), total_pop_zip = sum(total_pop)) %>%
mutate(
percent_at_home = total_at_home*100/total_devices,
percent_leaving_home = (100 - percent_at_home),
visits_per_capita = visit_counts_avg/ total_pop_zip,
dwell_per_capita = dwell_time/total_pop_zip,
distance_per_capita = distance_traveled/total_pop_zip
) %>%
filter(!is.na(zipcode))
fig_test <- sf_sd_visits_pre_1_zip %>% filter(date == min(date)) %>% plot_ly() %>%
add_trace(x = ~zipcode, y = ~percent_leaving_home/10, type = 'scatter', mode = 'markers',name = "percent leaving/10") %>%
add_trace(x = ~zipcode, y = ~visits_per_capita, type = 'scatter', mode = 'markers',name = "visits per capita") %>%
add_trace(x = ~zipcode, y = ~dwell_per_capita, type = 'scatter', mode = 'markers',name = "dwell per capita") %>%
add_trace(x = ~zipcode, y = ~distance_per_capita, type = 'scatter', mode = 'markers',name = "distance per capita")
fig_test
Mapping for a specific day.
sf_sd_visits_3_9_geom <- sf_sd_visits_pre_1 %>% filter(date == min(date)) %>% st_as_sf() %>% ungroup()
mapview(sf_sd_visits_3_9_geom %>% dplyr::select(percent_leaving_home))
mapview(sf_sd_visits_3_9_geom %>% dplyr::select(visits_per_capita))
# mapview(sf_sd_visits_3_9_geom %>% dplyr::select(distance_per_capita))
# mapview(sf_sd_visits_3_9_geom %>% dplyr::select(dwell_per_capita))
# by zip code
sf_sd_visits_3_9_geom_zip <- sf_sd_visits_pre_1_zip %>% filter(date == min(date)) %>%
left_join(zctas_94, by = c("zipcode" = "ZCTA5CE10")) %>%
st_as_sf() %>% ungroup()
mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(percent_leaving_home))
mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(visits_per_capita))
# mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(distance_per_capita))
#
# mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(dwell_per_capita))